Overview

Using Seurat vignette, here I try to analyze * Nanostring CosMx Spatial Molecular Imager ref1. nanostring-biostats.github.io/CosMx-Analysis-Scratch-Space/posts/seurat-cosmx-basics/

ref2. satijalab.org/seurat/articles/seurat5_spatial_vignette_2.html

# renv::install("future")
# renv::install("ggplot2")
# renv::install("Seurat")
# renv::install("ggthemes")
# renv::install("ggdark")

First, we load the packages necessary for this vignette.

library(Seurat)
library(future)
plan("multisession", workers = 4)
library(ggplot2)

Human Lung: Nanostring CosMx Spatial Molecular Imager

This dataset was produced using Nanostring CosMx Spatial Molecular Imager (SMI). The CosMX SMI performs multiplexed single molecule profiling, can profile both RNA and protein targets, and can be applied directly to FFPE tissues. The dataset represents 8 FFPE samples taken from 5 non-small-cell lung cancer (NSCLC) tissues, and is available for public download. The gene panel consists of 960 transcripts.

For this dataset, instead of performing unsupervised analysis, we map the Nanostring profiles to our Azimuth Healthy Human Lung reference, which was defined by scRNA-seq. We used Azimuth version 0.4.3 with the human lung reference version 1.0.0. You can download the precomputed results here, which include annotations, prediction scores, and a UMAP visualization. The median number of detected transcripts/cell is 249, which does create uncertainty for the annotation process.

ref3. github.com/satijalab/seurat/issues/6786

data.dir <- "./data/Lung5_Rep1/Lung5_Rep1-Flat_files_and_images"
mtx.file <- "./data/Lung5_Rep1/Lung5_Rep1-Flat_files_and_images/Lung5_Rep1_exprMat_file.csv"
# Path to Nanostring cell x gene matrix CSV
metadata.file <- "./data/Lung5_Rep1/Lung5_Rep1-Flat_files_and_images/Lung5_Rep1_metadata_file.csv"
# Contains metadata including cell center, area, and stain intensities
molecules.file <- "./data/Lung5_Rep1/Lung5_Rep1-Flat_files_and_images/Lung5_Rep1_tx_file.csv"
# Path to molecules file

# Path to segmentation CSV is not provided

data <- Seurat::ReadNanostring(data.dir = data.dir, type = "centroids")
nano.obj <- Seurat::CreateSeuratObject(counts = data$matrix)

fov.name.pre <- "Lung5.Rep1.Fov."
metadata.df <- read.csv(metadata.file)
fov.numbers <-  unique(metadata.df[ ,"fov"])
for (i in fov.numbers) {
  fov.name <- paste(fov.name.pre, i, sep="")
  print(fov.name)
}
## [1] "Lung5.Rep1.Fov.1"
## [1] "Lung5.Rep1.Fov.2"
## [1] "Lung5.Rep1.Fov.3"
## [1] "Lung5.Rep1.Fov.4"
## [1] "Lung5.Rep1.Fov.5"
## [1] "Lung5.Rep1.Fov.6"
## [1] "Lung5.Rep1.Fov.7"
## [1] "Lung5.Rep1.Fov.8"
## [1] "Lung5.Rep1.Fov.9"
## [1] "Lung5.Rep1.Fov.10"
## [1] "Lung5.Rep1.Fov.11"
## [1] "Lung5.Rep1.Fov.12"
## [1] "Lung5.Rep1.Fov.13"
## [1] "Lung5.Rep1.Fov.14"
## [1] "Lung5.Rep1.Fov.15"
## [1] "Lung5.Rep1.Fov.16"
## [1] "Lung5.Rep1.Fov.17"
## [1] "Lung5.Rep1.Fov.18"
## [1] "Lung5.Rep1.Fov.19"
## [1] "Lung5.Rep1.Fov.20"
## [1] "Lung5.Rep1.Fov.21"
## [1] "Lung5.Rep1.Fov.22"
## [1] "Lung5.Rep1.Fov.23"
## [1] "Lung5.Rep1.Fov.24"
## [1] "Lung5.Rep1.Fov.25"
## [1] "Lung5.Rep1.Fov.26"
## [1] "Lung5.Rep1.Fov.27"
## [1] "Lung5.Rep1.Fov.28"
## [1] "Lung5.Rep1.Fov.29"
## [1] "Lung5.Rep1.Fov.30"
rownames(metadata.df) <- paste(metadata.df$cell_ID, "_", metadata.df$fov, sep="")
nano.obj <- AddMetaData(
  object = nano.obj,
  metadata = metadata.df
)
head(nano.obj[[]])
orig.ident nCount_RNA nFeature_RNA fov cell_ID Area AspectRatio CenterX_local_px CenterY_local_px CenterX_global_px CenterY_global_px Width Height Mean.MembraneStain Max.MembraneStain Mean.PanCK Max.PanCK Mean.CD45 Max.CD45 Mean.CD3 Max.CD3 Mean.DAPI Max.DAPI
1_1 SeuratProject 23 19 1 1 1259 1.34 1027 3631 4215.889 158847.7 47 35 3473 7354 715 5755 361 845 22 731 4979 26374
2_1 SeuratProject 26 23 1 2 3723 1.45 2904 3618 6092.889 158834.7 87 60 3895 13832 18374 53158 260 1232 13 686 1110 13229
3_1 SeuratProject 74 51 1 3 2010 1.62 4026 3627 7214.889 158843.7 68 42 2892 6048 3265 37522 378 908 19 654 10482 33824
4_1 SeuratProject 60 48 1 4 3358 0.47 4230 3597 7418.889 158813.7 48 102 6189 16091 485 964 679 2322 5 582 6065 39512
5_1 SeuratProject 52 39 1 5 1213 1.00 4258 3629 7446.889 158845.7 38 38 8138 19281 549 874 566 1242 17 674 3311 30136
6_1 SeuratProject 5 5 1 6 2647 1.38 66 3622 3254.889 158838.7 72 52 5713 12617 1220 5107 433 957 11 547 4151 19269
nano.obj.original <- nano.obj

fov.name.total <- paste(fov.name.pre, "Total", sep = "")
data <- Seurat::ReadNanostring(data.dir = data.dir, type = "centroids")
cents <- SeuratObject::CreateCentroids(data$centroids)

molecules.df <- read.csv(molecules.file)
molecules.df <- dplyr::mutate(molecules.df, cell = paste(cell_ID, "_", fov, sep = ""))
# molecules.df <- readRDS("./data/Lung5_Rep1/molecules.df.RDS")
molecules.df.3columns <- molecules.df[, c(3, 4, 8)]
nano.mol <- CreateMolecules(molecules.df.3columns, key = "CosMx_")

cents.total <- subset(cents, cell = Cells(nano.obj))
molecules.df.temp <- dplyr::filter(molecules.df, cell %in% Cells(nano.obj))
molecules.df.3columns.temp <- molecules.df.temp[, c(3, 4, 8)]
nano.mol.temp <- SeuratObject::CreateMolecules(molecules.df.3columns.temp, key = "CosMx_")
coords.temp <- SeuratObject::CreateFOV(coords = list("centroids" = cents.total), type = "centroids", molecules = nano.mol.temp, assay = "RNA", key = "CosMx_", name = NULL)
nano.obj[[fov.name.total]] <- coords.temp

fov.numbers <-  unique(metadata.df[ ,"fov"])
for (i in fov.numbers) {
  # i=1
  fov.name <- paste(fov.name.pre, i, sep="")
  nano.obj.subset <- subset(nano.obj.original, subset = fov == i)
  cents.temp <- subset(cents, cell = Cells(nano.obj.subset))
  molecules.df.temp <- dplyr::filter(molecules.df, cell %in% Cells(nano.obj.subset))
  molecules.df.3columns.temp <- molecules.df.temp[, c(3, 4, 8)]
  nano.mol.temp <- SeuratObject::CreateMolecules(molecules.df.3columns.temp, key = "CosMx_")
  coords.temp <- SeuratObject::CreateFOV(coords = list("centroids" = cents.temp), type = "centroids", molecules = nano.mol.temp, assay = "RNA", key = "CosMx_", name = NULL)
  nano.obj[[fov.name]] <- coords.temp
}

cell metadata

head(nano.obj@meta.data)
orig.ident nCount_RNA nFeature_RNA fov cell_ID Area AspectRatio CenterX_local_px CenterY_local_px CenterX_global_px CenterY_global_px Width Height Mean.MembraneStain Max.MembraneStain Mean.PanCK Max.PanCK Mean.CD45 Max.CD45 Mean.CD3 Max.CD3 Mean.DAPI Max.DAPI
1_1 SeuratProject 23 19 1 1 1259 1.34 1027 3631 4215.889 158847.7 47 35 3473 7354 715 5755 361 845 22 731 4979 26374
2_1 SeuratProject 26 23 1 2 3723 1.45 2904 3618 6092.889 158834.7 87 60 3895 13832 18374 53158 260 1232 13 686 1110 13229
3_1 SeuratProject 74 51 1 3 2010 1.62 4026 3627 7214.889 158843.7 68 42 2892 6048 3265 37522 378 908 19 654 10482 33824
4_1 SeuratProject 60 48 1 4 3358 0.47 4230 3597 7418.889 158813.7 48 102 6189 16091 485 964 679 2322 5 582 6065 39512
5_1 SeuratProject 52 39 1 5 1213 1.00 4258 3629 7446.889 158845.7 38 38 8138 19281 549 874 566 1242 17 674 3311 30136
6_1 SeuratProject 5 5 1 6 2647 1.38 66 3622 3254.889 158838.7 72 52 5713 12617 1220 5107 433 957 11 547 4151 19269

Transcript counts. Here, transcript counts are in the ‘Nanostring’ assay but in other objects they may be stored in an ‘RNA’ assay.

nano.obj@assays$RNA$counts[1:5, 1:5]
## 5 x 5 sparse Matrix of class "dgCMatrix"
##      1_1 2_1 3_1 4_1 5_1
## AATK   .   .   .   .   .
## ABL1   .   .   .   .   .
## ABL2   .   .   .   .   .
## ACE    .   .   .   .   .
## ACE2   .   .   .   .   .

cluster findings

options(future.globals.maxSize= 891289600)
nano.obj <- SCTransform(nano.obj, assay = "RNA")
nano.obj <- RunPCA(nano.obj, npcs = 30, features = rownames(nano.obj))
nano.obj <- RunUMAP(nano.obj, dims = 1:30)
nano.obj <- FindNeighbors(nano.obj, reduction = "pca", dims = 1:30)
nano.obj <- FindClusters(nano.obj, resolution = 0.3)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 100149
## Number of edges: 3770105
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9373
## Number of communities: 14
## Elapsed time: 54 seconds

UMAP positions

Idents(nano.obj) <- nano.obj@meta.data[["seurat_clusters"]]
DefaultAssay(nano.obj) <- "RNA"
nano.obj@reductions$umap@cell.embeddings[1:10,]
##         umap_1    umap_2
## 1_1  6.3523229 -2.606786
## 2_1  0.2753049 -2.062907
## 3_1  3.0064806  2.684944
## 4_1  5.1988495  2.752814
## 5_1  5.2295526 -5.697406
## 6_1  5.3388604 -2.597123
## 7_1  4.7808627 -5.720571
## 8_1  7.2765411  6.987360
## 9_1  5.3787044  5.914402
## 10_1 5.7445096  6.320855
Seurat::DimPlot(nano.obj)

Seurat::FeaturePlot(nano.obj, features = c("CD8", "FOXP3", "GAD1", "SST", "GFAP", "Mean.CD45"), slot = "counts", max.cutoff = "q95", coord.fixed = TRUE)

Image names. Each slide is stored as a separate image within the object.

Images(nano.obj)
##  [1] "Lung5.Rep1.Fov.Total" "Lung5.Rep1.Fov.1"     "Lung5.Rep1.Fov.2"    
##  [4] "Lung5.Rep1.Fov.3"     "Lung5.Rep1.Fov.4"     "Lung5.Rep1.Fov.5"    
##  [7] "Lung5.Rep1.Fov.6"     "Lung5.Rep1.Fov.7"     "Lung5.Rep1.Fov.8"    
## [10] "Lung5.Rep1.Fov.9"     "Lung5.Rep1.Fov.10"    "Lung5.Rep1.Fov.11"   
## [13] "Lung5.Rep1.Fov.12"    "Lung5.Rep1.Fov.13"    "Lung5.Rep1.Fov.14"   
## [16] "Lung5.Rep1.Fov.15"    "Lung5.Rep1.Fov.16"    "Lung5.Rep1.Fov.17"   
## [19] "Lung5.Rep1.Fov.18"    "Lung5.Rep1.Fov.19"    "Lung5.Rep1.Fov.20"   
## [22] "Lung5.Rep1.Fov.21"    "Lung5.Rep1.Fov.22"    "Lung5.Rep1.Fov.23"   
## [25] "Lung5.Rep1.Fov.24"    "Lung5.Rep1.Fov.25"    "Lung5.Rep1.Fov.26"   
## [28] "Lung5.Rep1.Fov.27"    "Lung5.Rep1.Fov.28"    "Lung5.Rep1.Fov.29"   
## [31] "Lung5.Rep1.Fov.30"

Positions in space, here shown for one image / slide

nano.obj@images[[Images(nano.obj)[1]]]$centroids@coords[1:20,] # In this object, this is equivalent to: seu.obj@images$Run1000.S1.Half$centroids@coords[1:10,]
##              x        y
##  [1,] 4215.889 158847.7
##  [2,] 6092.889 158834.7
##  [3,] 7214.889 158843.7
##  [4,] 7418.889 158813.7
##  [5,] 7446.889 158845.7
##  [6,] 3254.889 158838.7
##  [7,] 3902.889 158820.7
##  [8,] 3992.889 158832.7
##  [9,] 4067.889 158827.7
## [10,] 4111.889 158842.7
## [11,] 4254.889 158841.7
## [12,] 4495.889 158830.7
## [13,] 4805.889 158845.7
## [14,] 5023.889 158839.7
## [15,] 5233.889 158828.7
## [16,] 5612.889 158823.7
## [17,] 6215.889 158828.7
## [18,] 6285.889 158850.7
## [19,] 6338.889 158830.7
## [20,] 6403.889 158845.7

Plot data in space

Within the Seurat object, each slide is stored as a separate ‘image’ or ‘fov’. This is an unfortunate naming convention difference between CosMx nomenclature and the Seurat package. What Seurat refers to as an ‘fov’ is what NanoString refers to as a slide. When plotting cells in space, you need to specify the Seurat ‘fov’ to plot, and this is equivalent to choosing which CosMx slide to plot.

Plot all cells on one slide in space, coloring by cell type.

Get name of the first image

image1 <- Images(nano.obj)[1]

Plot all cells.

We recommend setting the border color to ‘NA’ as the default ‘white’ often masks all cells when zoomed out, leading to a fully white plot.

p1 <- ImageFeaturePlot(nano.obj, features = "Mean.CD45", border.color = "black")
p2 <- ImageDimPlot(nano.obj, molecules = "Mean.CD45", nmols = 10000, alpha = 1, mols.cols = "red")
p1 + p2

Plot the location of individual transcripts with the ‘molecules’ option.

ImageDimPlot(nano.obj,
             fov = Images(nano.obj)[1],
             border.color = "black",
             alpha = 0.9, # Reduce alpha of cell fills to better visualize the overlaying molcules
             molecules = NA,
             mols.size = 0.2,
             nmols = 100000, # Set the total number of molecules to visualize
             axes = TRUE)

(divingintogeneticsandgenomics.com/post/how-to-construct-a-spatial-object-in-seurat/)[https://divingintogeneticsandgenomics.com/post/how-to-construct-a-spatial-object-in-seurat/]

## This gives you the image 
ggplot2::ggplot(metadata.df, aes(x= CenterX_global_px, y = CenterY_global_px))+
        ggplot2::geom_point(size = 0.1, color = "grey") + ggplot2::coord_fixed() + 
        ggdark::dark_mode(ggthemes::theme_fivethirtyeight())

ggplot2::ggplot(metadata.df, aes(x= CenterX_global_px, y = CenterY_global_px))+
        ggplot2::geom_point(size = 0.1, color = "grey") + ggplot2::coord_fixed()

Some helper function:

library(magrittr)

multigene_expression_df <- function(obj, assay = "RNA", layer = "count", 
                                    vector.genes = NULL, cells = NULL){
  i = 0
  for (gene in vector.genes){
    i = i + 1;
    df.i <- get_expression_data(obj, assay = assay, layer = layer, genes = gene, cells = NULL)
    if (i == 1){
      df <- df.i
    } else {
      df.i <- df.i[, c(1,2)]
      head(df.i)
      df <- dplyr::left_join(df.i, df, by = "cell")
    }
  }
  return(df)
}

matrix_to_expression_df<- function(x, obj){
        df <- x %>%
                as.matrix() %>% 
                as.data.frame() %>%
                tibble::rownames_to_column(var= "gene") %>%
                tidyr::pivot_longer(cols = -1, names_to = "cell", values_to = "expression") %>%
                tidyr::pivot_wider(names_from = "gene", values_from = expression) %>%
                dplyr::left_join(obj@meta.data %>% 
                                  tibble::rownames_to_column(var = "cell"))
        return(df)
}


get_expression_data <- function(obj, assay = "RNA", layer = "count", 
                               genes = NULL, cells = NULL){
        if (is.null(genes) & !is.null(cells)){
                df <- Seurat::GetAssayData(obj, assay = assay, layer = layer)[, cells, drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else if (!is.null(genes) & is.null(cells)){
                df <- Seurat::GetAssayData(obj, assay = assay, layer = layer)[genes, , drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else if (is.null(genes) & is.null(cells)){
                df <- Seurat::GetAssayData(obj, assay = assay, layer = layer)[, , drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else {
                df <- Seurat::GetAssayData(obj, assay = assay, layer = layer)[genes, cells, drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        }
        return(df)
}

helper function for Dim graph

ImageDimPlotGenesPerCell <- function(df, genes = NULL, colors = "glasbey", size = 0.1){
  # "alphabet", "alphabet2", "glasbey", "polychrome", "stepped", and "parade"
  color.palette <- c("alphabet", "alphabet2", "glasbey", "polychrome", "stepped", "parade")
  if (colors %in% color.palette){
    colors <- Seurat::DiscretePalette(32, palette = colors)
  }
  
  q95 <- quantile(df[[genes]], 0.95)
  ggDim <- ggplot2::ggplot(df, aes(x= CenterX_global_px, y=CenterY_global_px)) + 
    ggplot2::scale_color_gradient(low = "grey", high = colors[1], limit = c(0, q95)) + 
    ggplot2::geom_point(aes(color = df[[genes]]), size = size, alpha = 1, shape = 19) + 
    ggplot2::coord_fixed() + 
    ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
    theme(plot.title = element_text(family = "sans"), 
          plot.background = element_rect(fill = "grey10"), 
          panel.background = element_blank(), 
          panel.grid.major = element_line(color = "grey30", size = 0.2), 
          panel.grid.minor = element_line(color = "grey30", size = 0.2), 
          legend.background = element_blank(), axis.ticks = element_blank(), 
          axis.line = element_line(size = 0.5), legend.key = element_blank())
  ggDim
}

Trial to use functions

EPCAM.df <- get_expression_data(nano.obj, assay = "RNA", layer = "counts", genes = "EPCAM")
FOXP3.df <- get_expression_data(nano.obj, assay = "RNA", layer = "counts", genes = "FOXP3")
genes.df <- multigene_expression_df(nano.obj, assay = "RNA", layer = "count", 
                                    vector.genes = c("EPCAM", "FOXP3"), cells = NULL)
# quantile(genes.df[["EPCAM"]], 0.95)
ggg <- ImageDimPlotGenesPerCell(genes.df, genes = "EPCAM", colors = "blue", size = 0.1)
ggg

 ggplot2::ggplot(genes.df, aes(x= CenterX_global_px, y=CenterY_global_px)) +
        ggplot2::scale_color_gradient(low = "grey", high = "red", limit = c(0, 8)) +
        ggplot2::geom_point(aes(color = EPCAM), size = 0.1, alpha = 1, shape = 19) + 
        ggplot2::coord_fixed() + 
        ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
        theme(plot.title = element_text(family = "sans"),
        plot.background = element_rect(fill = "grey10"),
        panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey30", size = 0.2),
        panel.grid.minor = element_line(color = "grey30", size = 0.2),
        legend.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(size = 0.5),
        legend.key = element_blank())

helper function for molecules

position_of_molecules <- function(obj, fov, genes){
  i = 0
  for (gene in genes){
    i = i + 1
    molecules.df.extracted <- 
      data.frame(obj@images[[fov]]@molecules[["molecules"]][[gene]]@coords)
    colnames(molecules.df.extracted) <- c("CenterX_global_px","CenterY_global_px")
    molecules.df.extracted <- dplyr::mutate(molecules.df.extracted, gene.name = gene)
    if (i == 1){
      molecules.df.extracted.total <- molecules.df.extracted
    } else {
      molecules.df.extracted.total <- 
        rbind(molecules.df.extracted.total, molecules.df.extracted)
    }
  }
  return(molecules.df.extracted.total)
}


# test.df1 <- data.frame(nano.obj@images[["Lung5.Rep1.Fov.Total"]]@molecules[["molecules"]][["AATK"]]@coords)
# colnames(test.df1) <- c("CenterX_global_px","CenterY_global_px")
# test.df1 <- dplyr::mutate(test.df1, gene.name = "AATK")

# test.df2 <- data.frame(nano.obj@images[["Lung5.Rep1.Fov.Total"]]@molecules[["molecules"]][["EPCAM"]]@coords)
# colnames(test.df2) <- c("CenterX_global_px","CenterY_global_px")
# test.df2 <- dplyr::mutate(test.df2, gene.name = "EPCAM")

# test.df <- rbind(test.df1, test.df2)
test.df <- position_of_molecules(nano.obj, fov = "Lung5.Rep1.Fov.Total", genes = c("AATK", "EPCAM", "TAGLN"))
# class(test.df) # "matrix""array"
# ggplot2::ggplot(data = test.df, aes(CenterX_global_px, CenterY_global_px)) +  ggplot2::geom_point(shape = 19, size = 0.5, color = "green")
# ggg + geom_point(data = test.df, aes(CenterX_global_px, CenterY_global_px), 
#                  shape = 19, size = 0.5, color = "green")

ImageMoleculePlot <- function(df, size = 0.1){
  gg.molecules <- ggplot2::ggplot(df, aes(x= CenterX_global_px, y=CenterY_global_px)) +
        ggplot2::geom_point(aes(color = gene.name), size = size, alpha = 1, shape = 19) + 
        ggplot2::coord_fixed() + 
        ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
        theme(plot.title = element_text(family = "sans"),
        plot.background = element_rect(fill = "grey10"),
        panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey30", size = 0.2),
        panel.grid.minor = element_line(color = "grey30", size = 0.2),
        legend.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(size = 0.5),
        legend.key = element_blank()) + 
        guides(colour = guide_legend(override.aes = list(size=2)))
  return(gg.molecules)
}

ImageMoleculePlot(test.df, size = 0.1)

# ggplot2::ggplot(test.df, aes(x= CenterX_global_px, y=CenterY_global_px)) +
#         ggplot2::geom_point(aes(color = gene.name), size = 0.1, alpha = 1, shape = 19) + 
#         ggplot2::coord_fixed() + 
#         ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
#         theme(plot.title = element_text(family = "sans"),
#         plot.background = element_rect(fill = "grey10"),
#         panel.background = element_blank(),
#         panel.grid.major = element_line(color = "grey30", size = 0.2),
#         panel.grid.minor = element_line(color = "grey30", size = 0.2),
#         legend.background = element_blank(),
#         axis.ticks = element_blank(),
#         axis.line = element_line(size = 0.5),
#         legend.key = element_blank()) + 
#        guides(colour = guide_legend(override.aes = list(size=2)))
ImageDimClusterPlot <- function(obj, size = 0.1, colorby = "seurat_clusters"){
  Seurat.Clusters <- obj@meta.data[[colorby]]
  ggplot2::ggplot(obj@meta.data, aes(x= CenterX_global_px, y=CenterY_global_px)) +
        ggplot2::geom_point(aes(color = Seurat.Clusters), 
                            size = size, alpha = 1, shape = 19) + 
        ggplot2::coord_fixed() + 
        ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
        theme(plot.title = element_text(family = "sans"),
        plot.background = element_rect(fill = "grey10"),
        panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey30", size = 0.2),
        panel.grid.minor = element_line(color = "grey30", size = 0.2),
        legend.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(size = 0.5),
        legend.key = element_blank()) + 
        guides(colour = guide_legend(override.aes = list(size=2)))
}

ImageDimClusterPlot(nano.obj, size = 0.1, colorby = "seurat_clusters")

Get the expression data and merge with the spatial information.

df <- get_expression_data(nano.obj, assay = "RNA", layer = "counts", genes = "EPCAM")

head(df)
cell EPCAM orig.ident nCount_RNA nFeature_RNA fov cell_ID Area AspectRatio CenterX_local_px CenterY_local_px CenterX_global_px CenterY_global_px Width Height Mean.MembraneStain Max.MembraneStain Mean.PanCK Max.PanCK Mean.CD45 Max.CD45 Mean.CD3 Max.CD3 Mean.DAPI Max.DAPI nCount_SCT nFeature_SCT SCT_snn_res.0.3 seurat_clusters
1_1 0 SeuratProject 23 19 1 1 1259 1.34 1027 3631 4215.889 158847.7 47 35 3473 7354 715 5755 361 845 22 731 4979 26374 141 43 1 1
2_1 0 SeuratProject 26 23 1 2 3723 1.45 2904 3618 6092.889 158834.7 87 60 3895 13832 18374 53158 260 1232 13 686 1110 13229 142 44 9 9
3_1 0 SeuratProject 74 51 1 3 2010 1.62 4026 3627 7214.889 158843.7 68 42 2892 6048 3265 37522 378 908 19 654 10482 33824 173 53 12 12
4_1 0 SeuratProject 60 48 1 4 3358 0.47 4230 3597 7418.889 158813.7 48 102 6189 16091 485 964 679 2322 5 582 6065 39512 164 54 7 7
5_1 0 SeuratProject 52 39 1 5 1213 1.00 4258 3629 7446.889 158845.7 38 38 8138 19281 549 874 566 1242 17 674 3311 30136 159 44 1 1
6_1 0 SeuratProject 5 5 1 6 2647 1.38 66 3622 3254.889 158838.7 72 52 5713 12617 1220 5107 433 957 11 547 4151 19269 140 64 1 1
ggg1  <- ggplot2::ggplot(df, aes(x= CenterX_global_px, y=CenterY_global_px)) +
        ggplot2::scale_color_gradient(low = "grey", high = "red", limit = c(0, 8)) +
        ggplot2::geom_point(aes(color = EPCAM), size = 0.1, alpha = 1, shape = 19) + 
        ggplot2::coord_fixed() + 
        ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
        theme(plot.title = element_text(family = "sans"),
        plot.background = element_rect(fill = "grey10"),
        panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey30", size = 0.2),
        panel.grid.minor = element_line(color = "grey30", size = 0.2),
        legend.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(size = 0.5),
        legend.key = element_blank())
ImageDimPlot(nano.obj,
             fov = Images(nano.obj)[1],
             border.color = "black",
             alpha = 0.8, # Reduce alpha of cell fills to better visualize the overlaying molcules
             molecules = c("SLC17A7", "GAD1", "PLP1"),
             size = 2,
             mols.size = 0.2,
             nmols = 100000, # Set the total number of molecules to visualize
             axes = FALSE)

#DefaultAssay(nano.obj) <- "RNA"
#Seurat::Idents(nano.obj) <- "nFeature_RNA"

fun_color_range <- colorRampPalette(c("white", "tomato"))  # Create color generating function
my_colors <- fun_color_range(20)                         # Generate color range
#  [1] "#FFFF00" "#FFF100" "#FFE400" "#FFD600" "#FFC900" "#FFBB00" "#FFAE00"
#  [8] "#FFA100" "#FF9300" "#FF8600" "#FF7800" "#FF6B00" "#FF5D00" "#FF5000"
# [15] "#FF4300" "#FF3500" "#FF2800" "#FF1A00" "#FF0D00" "#FF0000"
my_colors                                                # Print color range
##  [1] "#FFFFFF" "#FFF6F5" "#FFEEEB" "#FFE6E1" "#FFDED8" "#FFD5CE" "#FFCDC4"
##  [8] "#FFC5BB" "#FFBDB1" "#FFB5A7" "#FFAC9E" "#FFA494" "#FF9C8A" "#FF9481"
## [15] "#FF8C77" "#FF836D" "#FF7B64" "#FF735A" "#FF6B50" "#FF6347"
gg1 <- ImageFeaturePlot(nano.obj,
             fov = Images(nano.obj)[1],
             border.color = "black",
             features = "nFeature_RNA")
             #size = 2) 

gg1

VlnPlot(nano.obj, features = c("nFeature_RNA", "nCount_RNA"), ncol = 2, pt.size = 0)

gg1 <- ImageDimPlot(nano.obj, fov = Images(nano.obj)[1], axes = TRUE, cols = "glasbey", border.color = "black")

# Color cells by log10totalcounts
gg2 <- ImageFeaturePlot(nano.obj,
             fov = Images(nano.obj)[1],
             border.color = "black",
            features = "Mean.DAPI", alpha = 1)

gg1 | gg2

(divingintogeneticsandgenomics.com/post/how-to-construct-a-spatial-object-in-seurat/)[https://divingintogeneticsandgenomics.com/post/how-to-construct-a-spatial-object-in-seurat/]

## This gives you the image 
ggplot2::ggplot(metadata.df, aes(x= CenterX_global_px, y = CenterY_global_px))+
        ggplot2::geom_point(size = 0.1, color = "grey") + ggplot2::coord_fixed() + 
        ggdark::dark_mode(ggthemes::theme_fivethirtyeight())

ggplot2::ggplot(metadata.df, aes(x= CenterX_global_px, y = CenterY_global_px))+
        ggplot2::geom_point(size = 0.1, color = "grey") + ggplot2::coord_fixed()

Some helper function:

library(magrittr)
matrix_to_expression_df<- function(x, obj){
        df<- x %>%
                as.matrix() %>% 
                as.data.frame() %>%
                tibble::rownames_to_column(var= "gene") %>%
                tidyr::pivot_longer(cols = -1, names_to = "cell", values_to = "expression") %>%
                tidyr::pivot_wider(names_from = "gene", values_from = expression) %>%
                dplyr::left_join(obj@meta.data %>% 
                                  tibble::rownames_to_column(var = "cell"))
        return(df)
}


get_expression_data <- function(obj, assay = "RNA", layer = "count", 
                               genes = NULL, cells = NULL){
        if (is.null(genes) & !is.null(cells)){
                df <- Seurat::GetAssayData(obj, assay = assay, layer = layer)[, cells, drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else if (!is.null(genes) & is.null(cells)){
                df <- Seurat::GetAssayData(obj, assay = assay, layer = layer)[genes, , drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else if (is.null(genes & is.null(cells))){
                df <- Seurat::GetAssayData(obj, assay = assay, layer = layer)[, , drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        } else {
                df<- Seurat::GetAssayData(obj, assay = assay, layer = layer)[genes, cells, drop = FALSE] %>%
                        matrix_to_expression_df(obj = obj)
        }
        return(df)
}

Get the expression data and merge with the spatial information.

df <- get_expression_data(nano.obj, assay="RNA", layer = "counts", genes = "EPCAM")

head(df)
cell EPCAM orig.ident nCount_RNA nFeature_RNA fov cell_ID Area AspectRatio CenterX_local_px CenterY_local_px CenterX_global_px CenterY_global_px Width Height Mean.MembraneStain Max.MembraneStain Mean.PanCK Max.PanCK Mean.CD45 Max.CD45 Mean.CD3 Max.CD3 Mean.DAPI Max.DAPI nCount_SCT nFeature_SCT SCT_snn_res.0.3 seurat_clusters
1_1 0 SeuratProject 23 19 1 1 1259 1.34 1027 3631 4215.889 158847.7 47 35 3473 7354 715 5755 361 845 22 731 4979 26374 141 43 1 1
2_1 0 SeuratProject 26 23 1 2 3723 1.45 2904 3618 6092.889 158834.7 87 60 3895 13832 18374 53158 260 1232 13 686 1110 13229 142 44 9 9
3_1 0 SeuratProject 74 51 1 3 2010 1.62 4026 3627 7214.889 158843.7 68 42 2892 6048 3265 37522 378 908 19 654 10482 33824 173 53 12 12
4_1 0 SeuratProject 60 48 1 4 3358 0.47 4230 3597 7418.889 158813.7 48 102 6189 16091 485 964 679 2322 5 582 6065 39512 164 54 7 7
5_1 0 SeuratProject 52 39 1 5 1213 1.00 4258 3629 7446.889 158845.7 38 38 8138 19281 549 874 566 1242 17 674 3311 30136 159 44 1 1
6_1 0 SeuratProject 5 5 1 6 2647 1.38 66 3622 3254.889 158838.7 72 52 5713 12617 1220 5107 433 957 11 547 4151 19269 140 64 1 1
ggplot2::ggplot(df, aes(x= CenterX_global_px, y=CenterY_global_px)) +
        ggplot2::scale_color_gradient(low = "grey", high = "red", limit = c(0, 8)) +
        ggplot2::geom_point(aes(color = EPCAM), size = 0.1, alpha = 1, shape = 19) + 
        ggplot2::coord_fixed() + 
        ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
        theme(plot.title = element_text(family = "sans"),
        plot.background = element_rect(fill = "grey10"),
        panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey30", size = 0.2),
        panel.grid.minor = element_line(color = "grey30", size = 0.2),
        legend.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(size = 0.5),
        legend.key = element_blank())

## This gives you the image 
ggplot2::ggplot(metadata.df, aes(x= CenterX_global_px, y = CenterY_global_px))+
        ggplot2::geom_point(size = 0.1, color = "grey") + ggplot2::coord_fixed() + 
        ggdark::dark_mode(ggthemes::theme_fivethirtyeight())

# saveRDS(nano.obj, file = "./data/Lung5_Rep1/nano.obj.RDS")

Integration with single-cell data

seurat.obj.lung <- readRDS("./data/Lung5_Rep1/braga_lung_demo.Rds")
# note that setting ncells=3000 normalizes the full dataset but learns noise models on 3k
# cells this speeds up SCTransform dramatically with no loss in performance
library(dplyr)
seurat.obj.lung <- SCTransform(seurat.obj.lung, ncells = 3000, verbose = FALSE) %>%
    RunPCA(verbose = FALSE) %>%
    RunUMAP(dims = 1:30)
# the annotation is stored in the 'celltype' column of object metadata
DimPlot(seurat.obj.lung, group.by = "celltype", label = TRUE)

# nano.obj <- readRDS("./data/Lung5_Rep1/nano.obj.RDS")
anchors <- FindTransferAnchors(reference = seurat.obj.lung, query = nano.obj, normalization.method = "SCT")
predictions.assay <- TransferData(anchorset = anchors, refdata = seurat.obj.lung$celltype, 
                                  prediction.assay = TRUE, weight.reduction = nano.obj[["pca"]], dims = 1:30)
nano.obj[["predictions"]] <- predictions.assay
# Now we get prediction scores for each spot for each class. Of particular interest in the frontal cortex region are the laminar excitatory neurons. Here we can distinguish between distinct sequential layers of these neuronal subtypes, for example:

DefaultAssay(nano.obj) <- "predictions"
# SpatialFeaturePlot(nano.obj, features = c("Mast cell", "Macrophages"), pt.size.factor = 1.6, ncol = 2, crop = TRUE)
saveRDS(nano.obj, file = "./data/Lung5_Rep1/nano.obj.predictions.RDS")
df <- get_expression_data(nano.obj, assay="predictions", layer = "data", genes = "Macrophages")

head(df)
cell Macrophages orig.ident nCount_RNA nFeature_RNA fov cell_ID Area AspectRatio CenterX_local_px CenterY_local_px CenterX_global_px CenterY_global_px Width Height Mean.MembraneStain Max.MembraneStain Mean.PanCK Max.PanCK Mean.CD45 Max.CD45 Mean.CD3 Max.CD3 Mean.DAPI Max.DAPI nCount_SCT nFeature_SCT SCT_snn_res.0.3 seurat_clusters
1_1 0.9091318 SeuratProject 23 19 1 1 1259 1.34 1027 3631 4215.889 158847.7 47 35 3473 7354 715 5755 361 845 22 731 4979 26374 141 43 1 1
2_1 0.0246827 SeuratProject 26 23 1 2 3723 1.45 2904 3618 6092.889 158834.7 87 60 3895 13832 18374 53158 260 1232 13 686 1110 13229 142 44 9 9
3_1 0.1415056 SeuratProject 74 51 1 3 2010 1.62 4026 3627 7214.889 158843.7 68 42 2892 6048 3265 37522 378 908 19 654 10482 33824 173 53 12 12
4_1 0.4911785 SeuratProject 60 48 1 4 3358 0.47 4230 3597 7418.889 158813.7 48 102 6189 16091 485 964 679 2322 5 582 6065 39512 164 54 7 7
5_1 0.9601458 SeuratProject 52 39 1 5 1213 1.00 4258 3629 7446.889 158845.7 38 38 8138 19281 549 874 566 1242 17 674 3311 30136 159 44 1 1
6_1 0.4343850 SeuratProject 5 5 1 6 2647 1.38 66 3622 3254.889 158838.7 72 52 5713 12617 1220 5107 433 957 11 547 4151 19269 140 64 1 1
ggplot2::ggplot(df, aes(x= CenterX_global_px, y=CenterY_global_px)) +
        ggplot2::scale_color_gradient(low = "grey", high = "red", limit = c(0, 8)) +
        ggplot2::geom_point(aes(color = Macrophages), size = 0.2, alpha = 1, shape = 19) + 
        ggplot2::coord_fixed() + 
        ggdark::dark_theme_gray(base_family = "sans", base_size = 10) + 
        theme(plot.title = element_text(family = "sans"),
        plot.background = element_rect(fill = "grey10"),
        panel.background = element_blank(),
        panel.grid.major = element_line(color = "grey30", size = 0.2),
        panel.grid.minor = element_line(color = "grey30", size = 0.2),
        legend.background = element_blank(),
        axis.ticks = element_blank(),
        axis.line = element_line(size = 0.5),
        legend.key = element_blank())

ImageFeaturePlot(nano.obj, features = c("Macrophages", "Fibroblast"), mols.size = 0.2, max.cutoff = "q95")

# ImageFeaturePlot(nano.obj, features = c("Macrophages", "Fibroblast"), max.cutoff = "q95")

Visualization of cell type and expression localization patterns

As in the previous example, ImageDimPlot() plots c ells based on their spatial locations, and colors them based on their assigned cell type. Notice that the basal cell population (tumor cells) is tightly spatially organized, as expected.

nano.obj <- readRDS("./data/Lung5_Rep1/nano.obj.predictions.RDS")
nano.obj$predicted.id <- GetTransferPredictions(nano.obj)
Idents(nano.obj) <- "predicted.id"
# SpatialDimPlot(nano.obj, cells.highlight = CellsByIdentities(object = nano.obj, idents = c("Macrophages", "Mast cell", "Fibroblast")), facet.highlight = TRUE)
ImageDimPlot(nano.obj, fov = Images(nano.obj)[1], axes = TRUE, cols = "glasbey", border.color = "black")

ImageDimPlot(nano.obj, fov =Images(nano.obj)[1], cells = WhichCells(nano.obj, idents = c("Mast cell", "Macrophages", "Fibroblast", "T cell")), cols = c("red", "orange", "green", "blue"), size = 0.6)

We can also visualize gene expression markers a few different ways:

DefaultAssay(nano.obj) <- "RNA"
VlnPlot(nano.obj, features = "KRT17", assay = "RNA", layer = "counts", pt.size = 0.1, y.max = 30) + NoLegend()

FeaturePlot(nano.obj, slot = "counts" ,features = "KRT17", max.cutoff = "q95")

p1 <- ImageFeaturePlot(nano.obj, fov = Images(nano.obj)[1], features = "KRT17", max.cutoff = "q95")
p2 <- ImageDimPlot(nano.obj, fov = Images(nano.obj)[1], alpha = 1, molecules = "KRT17", nmols = 10000, mols.size = 0.2)# + NoLegend()
p1 + p2

We can plot molecules in order to co-visualize the expression of multiple markers, including KRT17 (basal cells), C1QA (macrophages), IL7R (T cells), and TAGLN (Smooth muscle cells).

DefaultAssay(nano.obj) <- "RNA"
# Plot some of the molecules which seem to display spatial correlation with each other
ImageDimPlot(nano.obj, group.by = "orig.ident", alpha = 0.3, molecules = c("KRT17", "C1QA", "IL7R", "TAGLN"), nmols =20000, mols.size=1, mols.cols = "glasbey")

ImageFeaturePlot(nano.obj, fov = Images(nano.obj)[1], features = c("KRT17", "C1QA", "IL7R", "TAGLN"), max.cutoff = "q95")

We zoom in on one basal-rich region using the Crop() function. Once zoomed-in, we can visualize individual cell boundaries as well in all visualizations.

Session Info
sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=Japanese_Japan.utf8  LC_CTYPE=Japanese_Japan.utf8   
## [3] LC_MONETARY=Japanese_Japan.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=Japanese_Japan.utf8    
## 
## time zone: Etc/GMT-9
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
## [1] dplyr_1.1.4        magrittr_2.0.3     ggplot2_3.5.1      future_1.33.2     
## [5] Seurat_5.1.0       SeuratObject_5.0.2 sp_2.1-4          
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3          jsonlite_1.8.8             
##   [3] spatstat.utils_3.0-5        farver_2.1.2               
##   [5] rmarkdown_2.27              zlibbioc_1.50.0            
##   [7] vctrs_0.6.5                 ROCR_1.0-11                
##   [9] spatstat.explore_3.2-7      S4Arrays_1.4.1             
##  [11] htmltools_0.5.8.1           ggdark_0.2.1               
##  [13] SparseArray_1.4.8           sass_0.4.9                 
##  [15] sctransform_0.4.1           parallelly_1.37.1          
##  [17] KernSmooth_2.23-24          bslib_0.7.0                
##  [19] htmlwidgets_1.6.4           ica_1.0-3                  
##  [21] plyr_1.8.9                  plotly_4.10.4              
##  [23] zoo_1.8-12                  cachem_1.1.0               
##  [25] igraph_2.0.3                mime_0.12                  
##  [27] lifecycle_1.0.4             pkgconfig_2.0.3            
##  [29] Matrix_1.7-0                R6_2.5.1                   
##  [31] fastmap_1.2.0               GenomeInfoDbData_1.2.12    
##  [33] MatrixGenerics_1.16.0       fitdistrplus_1.1-11        
##  [35] shiny_1.8.1.1               digest_0.6.36              
##  [37] colorspace_2.1-0            patchwork_1.2.0            
##  [39] S4Vectors_0.42.1            tensor_1.5                 
##  [41] RSpectra_0.16-1             irlba_2.3.5.1              
##  [43] GenomicRanges_1.56.1        labeling_0.4.3             
##  [45] progressr_0.14.0            fansi_1.0.6                
##  [47] spatstat.sparse_3.1-0       httr_1.4.7                 
##  [49] polyclip_1.10-6             abind_1.4-5                
##  [51] compiler_4.4.1              withr_3.0.0                
##  [53] fastDummies_1.7.3           highr_0.11                 
##  [55] MASS_7.3-60.2               DelayedArray_0.30.1        
##  [57] tools_4.4.1                 lmtest_0.9-40              
##  [59] httpuv_1.6.15               future.apply_1.11.2        
##  [61] goftest_1.2-3               glmGamPoi_1.16.0           
##  [63] glue_1.7.0                  nlme_3.1-164               
##  [65] promises_1.3.0              grid_4.4.1                 
##  [67] Rtsne_0.17                  cluster_2.1.6              
##  [69] reshape2_1.4.4              generics_0.1.3             
##  [71] gtable_0.3.5                spatstat.data_3.1-2        
##  [73] tidyr_1.3.1                 data.table_1.15.4          
##  [75] XVector_0.44.0              utf8_1.2.4                 
##  [77] BiocGenerics_0.50.0         spatstat.geom_3.2-9        
##  [79] RcppAnnoy_0.0.22            ggrepel_0.9.5              
##  [81] RANN_2.6.1                  pillar_1.9.0               
##  [83] stringr_1.5.1               spam_2.10-0                
##  [85] RcppHNSW_0.6.0              later_1.3.2                
##  [87] splines_4.4.1               lattice_0.22-6             
##  [89] renv_1.0.7                  survival_3.6-4             
##  [91] deldir_2.0-4                tidyselect_1.2.1           
##  [93] miniUI_0.1.1.1              pbapply_1.7-2              
##  [95] knitr_1.47                  gridExtra_2.3              
##  [97] IRanges_2.38.1              SummarizedExperiment_1.34.0
##  [99] scattermore_1.2             stats4_4.4.1               
## [101] xfun_0.45                   Biobase_2.64.0             
## [103] matrixStats_1.3.0           UCSC.utils_1.0.0           
## [105] stringi_1.8.4               lazyeval_0.2.2             
## [107] yaml_2.3.8                  evaluate_0.24.0            
## [109] codetools_0.2-20            tibble_3.2.1               
## [111] cli_3.6.3                   uwot_0.2.2                 
## [113] xtable_1.8-4                reticulate_1.38.0          
## [115] munsell_0.5.1               jquerylib_0.1.4            
## [117] Rcpp_1.0.12                 GenomeInfoDb_1.40.1        
## [119] globals_0.16.3              spatstat.random_3.2-3      
## [121] png_0.1-8                   parallel_4.4.1             
## [123] dotCall64_1.1-1             listenv_0.9.1              
## [125] ggthemes_5.1.0              viridisLite_0.4.2          
## [127] scales_1.3.0                ggridges_0.5.6             
## [129] crayon_1.5.3                leiden_0.4.3.1             
## [131] purrr_1.0.2                 rlang_1.1.4                
## [133] cowplot_1.1.3